global input "Q:\dc1prhcmsas01\PU2\data - sas"
global input2 "Q:\dc1prhcmsas01\PU2\New folder"
global input3 "Q:\dc1prhcmsas01\PU2\temp"
global temp "Q:\dc1prhcmsas01\PU2\temp_stata"
global log "Q:\dc1prhcmsas01\PU2\Log - Stata"
global output "Q:\dc1prhcmsas01\PU2\data_stata"
global graphs "Q:\dc1prhcmsas01\PU2\graphs_stata"

cd "Q:\dc1prhcmsas01\PU2\Code - Stata"

capture log close
log using "$log/18_distributions_workers", replace

set scheme plotplain
graph set window fontface "Times New Roman"

***************************************************************************************************************
***************************Clean Worker Baseline for Treated Group***********************************************
***************************************************************************************************************

*Extract the raw data
use "$output/treated_sal_worker.dta", clear 
	preserve 
		keep if base>1000000
		keep client_id emp_pur_c
		duplicates drop 
		tempfile outliers 
		save `outliers'
	restore 
	keep if time==-1
	merge m:1 client_id emp_pur_c using `outliers'
	drop if _merge==3
	gen total=base+ot
	save "$temp/worker_summary_baseline_sal.dta", replace
	
use  "$output/treated_hr_worker.dta", clear 
	preserve 
		keep if base>1000000
		keep client_id emp_pur_c
		duplicates drop 
		tempfile outliers 
		save `outliers'
	restore 
	keep if time==-1
	merge m:1 client_id emp_pur_c using `outliers'
	drop if _merge==3
	gen total=base+ot
	save "$temp/worker_summary_baseline_hr.dta", replace
	

*********************************************************************************************
*********************************Distribution of hours **************************************
*********************************************************************************************

*Hourly workers
use "$temp/worker_summary_baseline_hr.dta", clear 
histogram hours if hours<=100, width(1) percent xtitle("Average Weekly Hours") xline(40)
graph export "$graphs/histogram_hours_hrly.eps", replace
graph export "$graphs/histogram_hours_hrly.jpg", replace

*Salaried workers
use "$temp/worker_summary_baseline_sal.dta", clear 
histogram hours if hours<=100, width(1) percent xtitle("Average Weekly Hours") xline(40)
graph export "$graphs/histogram_hours_sal.eps", replace
graph export "$graphs/histogram_hours_sal.jpg", replace


************************************************************************************************************
*********************************Distribution of total vs. gross pay **************************************
************************************************************************************************************

*Hourly workers
use "$temp/worker_summary_baseline_hr.dta", clear 
gen total_vs_gross=(base+ot)/gross
histogram total_vs_gross, percent xtitle("Ratio of Base + OT to Gross Pay") 
graph export "$graphs/histogram_total_vs_gross_hrly.eps", replace
graph export "$graphs/histogram_total_vs_gross_hrly.jpg", replace

histogram total_vs_gross if total_vs_gross<=2, width(0.02) percent xtitle("Ratio of Base + OT to Gross Pay") 
graph export "$graphs/histogram_total_vs_gross_hrly_zoomed.eps", replace
graph export "$graphs/histogram_total_vs_gross_hrly_zoomed.jpg", replace

*Salaried workers
use "$temp/worker_summary_baseline_sal.dta", clear
gen total_vs_gross=(base+ot)/gross
histogram total_vs_gross, percent xtitle("Ratio of Base + OT to Gross Pay") 
graph export "$graphs/histogram_total_vs_gross_sal.eps", replace
graph export "$graphs/histogram_total_vs_gross_sal.jpg", replace

histogram total_vs_gross if total_vs_gross<=2, width(0.02) percent xtitle("Ratio of Base + OT to Gross Pay") 
graph export "$graphs/histogram_total_vs_gross_sal_zoomed.eps", replace
graph export "$graphs/histogram_total_vs_gross_sal_zoomed.jpg", replace

************************************************************************************************
*********************************Distribution of Base Pay **************************************
************************************************************************************************
cap program drop plot_dist
program define plot_dist
args move threshold var ytitle ylabel name
	gen bin=floor((base+`move')/20)*20
	bysort bin: egen id=count(base)
	count 
	replace id=id/`r(N)'
	collapse (mean) `var', by(bin)
	twoway connect `var' bin if bin<=2500 & bin>=0, xline(`threshold') xtitle("Weekly Base Pay") ytitle(`ytitle') ylabel(`ylabel') 
	graph export "$graphs/distribution_`name'.eps", replace
	graph export "$graphs/distribution_`name'.jpg", replace
end
		
*ME Apr 2016
use state salaried base ot if state=="ME" & salaried==1 & base>0 using "$temp\cleaned_201604", clear
plot_dist 5 460 id "Share of Salaried Workers" 0(0.005)0.03 base_me2016

*CA Apr 2016
use state salaried base ot if state=="CA" & salaried==1 & base>0 using "$temp\cleaned_201604", clear
plot_dist 0 800 id "Share of Salaried Workers" 0(0.005)0.03 base_ca2016

*NY Apr 2016
use state salaried base ot if state=="NY" & salaried==1 & base>0  using "$temp\cleaned_201604", clear
plot_dist 5 680 id "Share of Salaried Workers" 0(0.005)0.03 base_ny2016

*AK Apr 2016
use state salaried base ot if state=="AK" & salaried==1 & base>0  using "$temp\cleaned_201604", clear
plot_dist 0 780 id "Share of Salaried Workers" 0(0.005)0.03 base_ak2016

*FLSA Apr 2016
use state salaried base ot if state!="ME" & state!="CA" & state!="NY" & state!="AK" & salaried==1 & base>0 using "$temp\cleaned_201604", clear
plot_dist 5 460 id "Share of Salaried Workers" 0(0.005)0.03 base_flsa2016


************************************************************************************************************
*********************************Probability of OT Pay, Given Base Pay**************************************
************************************************************************************************************
*ME Apr 2016
use state salaried base ot if state=="ME" & salaried==1 & base>0  using "$temp\cleaned_201604", clear
gen prob_ot=ot>0
plot_dist 5 460 prob_ot "Probability of Receiving Overtime Pay" 0(0.05)0.25 prob_ot_me2016

*CA Apr 2016
use state salaried base ot if state=="CA" & salaried==1 & base>0  using "$temp\cleaned_201604", clear
gen prob_ot=ot>0
plot_dist 0 800 prob_ot "Probability of Receiving Overtime Pay" 0(0.05)0.25 prob_ot_ca2016

*NY Apr 2016
use state salaried base ot if state=="NY" & salaried==1 & base>0  using "$temp\cleaned_201604", clear
gen prob_ot=ot>0
plot_dist 5 680 prob_ot "Probability of Receiving Overtime Pay" 0(0.05)0.25 prob_ot_ny2016

*AK Apr 2016
use state salaried base ot if state=="AK" & salaried==1 & base>0  using "$temp\cleaned_201604", clear
gen prob_ot=ot>0
plot_dist 0 780 prob_ot "Probability of Receiving Overtime Pay" 0(0.05)1 prob_ot_ak2016

*FLSA Apr 2016
use state salaried base ot if state!="ME" & state!="CA" & state!="NY" & state!="AK" & salaried==1 & base>0 using "$temp\cleaned_201604", clear
gen prob_ot=ot>0
plot_dist 5 460 prob_ot "Probability of Receiving Overtime Pay" 0(0.05)0.25 prob_ot_flsa2016

log close